%% DESCRIPTION: Compute homogeneous subsidy levels shown in Figure 6 (top panels) in the main text.

%% INPUTS: 
% ./data/name_num.csv 
% ./data/[YEAR]_ID SIC SALES PROFIT RND EMPL FIXED_CAPITAL_TOT_COST.csv 
% ./data/[YEAR]_Compustat_US_and_global_TOT_SALES.csv
% ./data/[YEAR]_FIRM_ID NUM_PAT.csv
% ./data/[YEAR]_ID RND_STOCK.csv
% ./data/[YEAR]_cati_merged_sdc_rnd_output_fixed_effects.csv
% ./data/[YEAR]_adjacency_matrix.mat
% with YEAR = 1995,...,2005

%% OTHER ROUTINES USED: 
% initialize_homogeneous_subsidies.m 
% output_corner.m 
% welfare_corner_homogeneous_subsidies.m
% node_betweenness_faster.m (optional) % https://github.com/ayanonagon/mkse312_project/blob/master/MIT/node_betweenness_faster.m
% closeness.m (optional) % https://github.com/ayanonagon/mkse312_project/blob/master/MIT/closeness.m
% kcoreness_centrality_bu.m (optional) % https://sites.google.com/site/bctnet/Home/functions/kcoreness_centrality_bu.m?attredirects=0
% eigenvector_centrality_und.m (optional) % % https://sites.google.com/site/bctnet/Home/functions/eigenvector_centrality_und.m?attredirects=0

%% NOTE: updated 05/11/2018

clear all
close all

%% Parameters.
time = [1990:1:2005];
num_ranked_firms = 25; % Number of key player firms to be tracked.
show_key_player_ranking = true;
show_subsidies_firms = true;
pat_prox = 'jaffe'; % Choose from: 'jaffe' or 'mahalanobis'
welfare_scale_factor = 10^(-10);

load_betw_centrality = true;
load_closeness_centrality = true;
load_kcoreness_centrality = true;
load_eigenvector_centrality = true;

opt_alg = 'fmincon'; % Choose from: 'nm' 'fmincon' 'fminunc' or 'sa'
global options_fmincon;
global options_QP;
global options_NM;
global options_fminunc;
global options_SA;
options_fmincon = optimset('MaxIter', 1e10,'TolX',1e-5,'MaxFunEvals',100,'Display','off');
options_QP = optimset('Display','off');
options_NM = optimset('MaxIter', 1e10, 'TolX', 1e-5,'Display','off');
options_SA = optimset('MaxIter', 1e10, 'TolX', 1e-5,'Display','off');
options_fminunc = optimset('MaxIter', 1e10, 'TolX', 1e-5,'Display','off');

tic

y_col = flipud(winter(length(time))); 
for s=1:length(time)
    disp(['\definecolor{col' num2str(s) '}{rgb}{' num2str(y_col(s,1)) ',' num2str(y_col(s,2)) ',' num2str(y_col(s,3)) '}']);
end
for s=1:length(time)
    disp(['\colorbox{col' num2str(s) '!50}{' num2str(time(s)) '} \\']);
end

col = winter(num_ranked_firms);
for i=1:num_ranked_firms
    disp(['\definecolor{col' num2str(i) '}{rgb}{' num2str(col(i,1)) ',' num2str(col(i,2)) ',' num2str(col(i,3)) '}']);
end
for i=1:num_ranked_firms
    disp(['\colorbox{col' num2str(i) '!50}{' num2str(i) '} \\']);
end
h_subsidy = figure();

%% Load firm names and indices data file.
disp('Load firm names and indices data file...');
file = ['./data/name_num.csv'];
fid  = fopen(file);
C = textscan(fid,'%s %s','delimiter',',','headerlines',0);
fclose(fid);
name = strtrim(C{1});
for i=1:length(name)
    name{i} = strrep(name{i}, '&', '\&');
end
num_of_firm = str2double(strtrim(C{2}));
clear('C')

%% Parameter estimates: output = phi*A*output - rho*market_output + constant + rnd_stock*beta + error.
phi = 0.0106;
phi_se = 0.0051;
lambda_tech = 0;
lambda_tech_se = 0;
rho = 0.0189;
rho_se = 0.0028;
rho_tstat = abs(rho./rho_se);
coeff_rnd_stock = 0.0027;
coeff_rnd_stock_se = 0.0002;
subsidy = zeros(length(time),1);
delta_W = zeros(length(time),1);
aggr_subsidy = zeros(length(time),1);
aggr_output = zeros(length(time),1);
hessian_pos_definite = zeros(length(time),1);
prod_surplus = zeros(length(time),1);

%% Iterating over different periods.
for s=1:length(time)
    
    disp(['Year: ' num2str(time(s))]);
    
    %% Load SIC codes and sales.
    disp(['Load SIC codes and sales...']);
    fid = fopen(['./data/' num2str(time(s)) '_ID SIC SALES PROFIT RND EMPL FIXED_CAPITAL TOT_COST.csv']);
    C = textscan(fid, '%s %s %s %s %s %s %s %s', 'delimiter', ';', 'headerlines', 1); % ID; SIC; SALES; PROFIT; RND; EMPL; FIXED_CAPITAL; TOT_COST
    fclose(fid);
    id_of_firm = str2double(strtrim(C{1}));
    sic = str2double(strtrim(C{2}));
    sales = str2double(strtrim(C{3}));
    clear('C');
    
    %% Load total sales.
    disp(['Load total sales...']);
    fid = fopen(['./data/' num2str(time(s)) '_Compustat_US_and_global_TOT_SALES.csv']); % SIC; TOT_SALES
    C = textscan(fid, '%s %s', 'delimiter', ';', 'headerlines', 1);
    fclose(fid);
    sic_of_sector = str2double(strtrim(C{1}));
    sales_of_sector = str2double(strtrim(C{2}));
    clear('C');
    
    %% Load the number of patents.
    disp(['Load the number of patents...']);
    fid = fopen(['./data/' num2str(time(s)) '_FIRM_ID NUM_PAT.csv']); % FIRM_ID, NUM_PAT
    C = textscan(fid, '%s %s', 'delimiter', ',', 'headerlines', 1);
    fclose(fid);
    pat_firm_id = str2double(strtrim(C{1}));
    num_pat = str2double(strtrim(C{2}));
    clear('C');
    
    %% Load R&D stocks.
    disp(['Load R&D stocks...']);
    fid = fopen(['./data/' num2str(time(s)-1) '_ID RND_STOCK.csv']);
    C = textscan(fid, '%s %s', 'delimiter', ';', 'headerlines', 1); % ID; RND_STOCK
    fclose(fid);
    rnd_stock_id_of_firm = str2double(strtrim(C{1}));
    rnd_stock = str2double(strtrim(C{2}));
    clear('C');
    
    %% Compute market shares.
    market_share = zeros(length(sales),1);
    disp(['Compute market_shares...']);
    for i=1:length(sales)
        ind = find(sic_of_sector==sic(i));
        if(~isnan(sales_of_sector(ind)))
            market_share(i) = sales(i)/sales_of_sector(ind);
        end
    end
    
    %% Load estimated firm fixed effects.
    disp('Load firm fixed effects...');
    fid = fopen(['./data/' num2str(time(s)) '_cati_merged_sdc_rnd_output_fixed_effects.csv']);
    dat = textscan(fid, '%s %s', 'delimiter', ',', 'headerlines', 0); % firm_id, firm_fe
    fclose(fid);
    firm_fe_id_of_firm = str2double(strtrim(dat{1}));
    firm_fe = str2double(strtrim(dat{2}));
    clear('dat');
    
    %% Load the adjacency matrix.
    disp(['Open data file...' 'data/' num2str(time(s)) '_adjacency_matrix.mat']);
    load(['./data/' num2str(time(s)) '_adjacency_matrix.mat']);
    
    %% Load the patent proximity matrix P.
    file = ['./data/' num2str(time(s)) '_' pat_prox '_proximity.mat'];
    load(file);
    
    %% Get the degree of each firm.
    disp(['Compute degree centrality...']);
    deg_centrality = full(sum(A));
    
    %% Get the betweenness centrality of each firm.
    if(load_betw_centrality)
        disp(['Load betweenness centrality...']);
        load(['data/' num2str(time(s)) '_betweenness_centrality.mat']);
    else
        disp(['Compute betweenness centrality...']);
        betw_centrality = node_betweenness_faster(A); % https://github.com/ayanonagon/mkse312_project/blob/master/MIT/node_betweenness_faster.m
        save(['data/' num2str(time(s)) '_betweenness_centrality.mat']);
    end
    
    %% Get the closeness centrality of each firm.
    if(load_closeness_centrality)
        disp(['Load closeness centrality...']);
        load(['data/' num2str(time(s)) '_closeness_centrality.mat']);
    else
        disp(['Compute closeness centrality...']);
        closeness_centrality = closeness(A); % https://github.com/ayanonagon/mkse312_project/blob/master/MIT/closeness.m
        save(['data/' num2str(time(s)) '_closeness_centrality.mat']);
    end
    
    %% Get the coreness centrality of each firm.
    if(load_kcoreness_centrality)
        disp(['Load coreness centrality...']);
        load(['data/' num2str(time(s)) '_kcoreness_centrality.mat']);
    else
        disp(['Compute coreness centrality...']);
        kcoreness_centrality = kcoreness_centrality_bu(A); % https://sites.google.com/site/bctnet/Home/functions/kcoreness_centrality_bu.m?attredirects=0
        save(['data/' num2str(time(s)) '_kcoreness_centrality.mat']);
    end
    
    %% Get the eigenvector centrality of each firm.
    if(load_eigenvector_centrality)
        disp(['Load eigenvector centrality...']);
        load(['data/' num2str(time(s)) '_eigenvector_centrality.mat']);
    else
        disp(['Compute eigenvector centrality...']);
        eigenvector_centrality = eigenvector_centrality_und(A); % https://sites.google.com/site/bctnet/Home/functions/eigenvector_centrality_und.m?attredirects=0
        save(['data/' num2str(time(s)) '_eigenvector_centrality.mat']);
    end
    
    %% Drop missing observations.
    disp(['Drop missing observations...']);
    missing = ones(length(id_of_firm),1);
    for i=1:length(id_of_firm)
        if(~isempty(find(firm_fe_id_of_firm==id_of_firm(i))))
            missing(i) = 0;
        end
    end
    id_of_firm = id_of_firm(~missing);
    sic = sic(~missing);
    sales = sales(~missing);
    market_share = market_share(~missing);
    rnd_stock = rnd_stock(~missing);
    A = A(~missing,:);
    A = A(:,~missing);
    A = full(+A);
    d = full(sum(A));
    P = P(~missing,:);
    P = P(:,~missing);
    P = full(+P);
    n = sum(~missing);
    deg_centrality = deg_centrality(~missing); 
    betw_centrality = betw_centrality(~missing);
    closeness_centrality = closeness_centrality(~missing);
    kcoreness_centrality = kcoreness_centrality(~missing);
    eigenvector_centrality = eigenvector_centrality(~missing);
    
    %% Compute firm specific weights.
    disp(['Compute firm specific weights...']);
    mu = zeros(n,1);
    for i=1:n
        ind = find(firm_fe_id_of_firm==id_of_firm(i));
        mu(i) = firm_fe(ind) + rnd_stock(i)*coeff_rnd_stock;
    end
    
    %% Compute the B matrix.
    disp(['Compute the B matrix...']);
    B = [];
    for i=1:n
        ind = sic(i)==sic;
        B = [B; ind'];
    end
    B = max(B,B');
    for i=1:n
        B(i,i) = 0;
    end
    
    %% Compute the inter-centralities.
    disp('Compute the intercentralities...');
    n = length(A);
    H = eye(n) + rho*B - phi*A - lambda_tech*P;
    hessian_pos_definite(s) = isempty(find(eig(H)<0)); % Record if Hessian is postive definite.
    M = inv(H);
    int_centrality = zeros(1,n);
    b_mu = M*mu; % Vector of weighted Bonacich centralities.
    b_u = M*ones(n,1); % Vector of Bonacich centralities.
    for i=1:n
        if(M(i,i)>0)
            int_centrality(i) = b_u(i)*b_mu(i)/M(i,i);
        end
    end
    aggr_int_centrality = sum(int_centrality);
    
    %% Compute initial subsidies for optimization algorithm.
    disp(['Compute initial subsidies for optimization algorithm...'])
    x_0 = initialize_homogeneous_subsidies(mu,B,A,P,rho,phi,lambda_tech);
    q_0 = x_0(1:n);
    s_0 = x_0(n+1);
    
    %% Compute the optimal subsidies.
    switch opt_alg
                    
        case 'fmincon'
            
            disp(['Compute the optimal subsidies using a constrained optimization algorithm (fmincon)...'])
            lb = 0;
            [x,fval] = fmincon(@(s) -welfare_scale_factor*welfare_corner_homogeneous_subsidies(mu,B,A,P,rho,phi,lambda_tech,s),s_0,[],[],[],[],lb,[],[]);
            
        case 'fminunc'

            disp(['Compute the optimal subsidies using an unconstrained optimization algorithm (fminunc)...'])
            [x,fval] = fminunc(@(s) -welfare_scale_factor*welfare_corner_homogeneous_subsidies(mu,B,A,P,rho,phi,lambda_tech,s),s_0,options_fminunc);
            
        case 'nm'
            
            disp(['Compute the optimal subsidies using an unconstrained Nelder-Mead algorithm...'])
            [x,fval] = fminsearch(@(s) -welfare_scale_factor*welfare_corner_homogeneous_subsidies(mu,B,A,P,rho,phi,lambda_tech,s),s_0,options_NM);
            
        case 'sa'
            
            disp(['Compute the optimal subsidies using an unconstrained simulated annealing algorithm...'])
            [x,fval,exitFlag,out] = simulannealbnd(@(s) -welfare_scale_factor*welfare_corner_homogeneous_subsidies(mu,B,A,P,rho,phi,lambda_tech,s),s_0,[],[]);
            
        otherwise
            
            error('No optimization algorithm specified.')
    end
    
    subsidy(s) = x;
    W_subsidy = -fval/welfare_scale_factor;
    
    %% Compute output levels and producer surplus with the optimal subsidy.
    q_sub = output_corner(mu,B,A,P,rho,phi,lambda_tech,subsidy(s)*ones(n,1));
    prod_surplus(s) = 1/2*q_sub'*q_sub;
    
    %% Compute aggregates.
    disp(['Compute aggregates...'])
    aggr_subsidy(s) = subsidy(s)*sum(q_sub) + n*subsidy(s)^2; % Compute the aggregate subsidies.
    aggr_output(s) = sum(q_sub);
    
    %% Compute output in the benchmark.
    disp(['Compute output in the benchmark...'])
    M = inv(eye(n)+rho*B-phi*A-lambda_tech*P);
    %q_bar = M*mu;
    q_bar = output_corner(mu,B,A,P,rho,phi,lambda_tech,zeros(n,1));
    
    %% Compute welfare in the benchmark.
    disp(['Compute welfare in the benchmark...'])
    W = q_bar'*q_bar + rho/2*q_bar'*B*q_bar;
    
    %% Compute the relative increase in welfare due to the subsidy.
    disp(['Compute the relative increase in welfare due to the subsidy...'])
    delta_W(s) = (W_subsidy - W)/W;
    
    disp(['Welfare gain/loss: ' num2str(100*delta_W(s)) ' %'])
    
    %% Plot the subsidies over firms.
    if(show_subsidies_firms)
        figure(h_subsidy);
        set(0,'defaulttextInterpreter','latex')
        set(gca,'Layer','top')
        set(gca,'FontSize',20);
        box on
        set(gca,'YScale','log')
        dummy = sort((q_sub+subsidy(s))/(mean(q_sub)+subsidy(s)),'descend');
        dummy = dummy(dummy>eps);
        plot(dummy,'-o','LineWidth',1.5,'color',y_col(s,:));
        hold on
        xlabel('firm $i$');
        ylabel('$e_i^* / \frac{1}{n} \sum_{j=1}^n e_j^*$');
        if(s==length(time))
            axis tight
            set(gca,'FontSize',20);
            print('-depsc',strcat(num2str(time(1)),'_',num2str(time(end)),'_',pat_prox,'_',opt_alg,'_hom_subsidies.eps'));
        end
    end
    
    %% Compute the subsidies ranking.
    disp('Compute the subsidies ranking...');
    [key_players_subsidy key_players] = sort(q_sub,'descend');
    
    %% Save the ranks for the num_ranked_firms key player firms.
    if(s==1)
        disp('');
        for i=1:num_ranked_firms
            ind = id_of_firm(key_players(i));
            key_firms(i) = ind;
            key_firms_name{i} = name{ind};
            if(i<10)
                disp(['\colorbox{col' num2str(i) '!50}{\phantom{0}' num2str(i) '} & ' key_firms_name{i} '\\']);
            else
                disp(['\colorbox{col' num2str(i) '!50}{' num2str(i) '} & ' key_firms_name{i} '\\']);
            end
        end
        disp('');
    end
    num_of_firm_of_key_player = zeros(length(key_players),1);
    for i=1:length(key_players)
        num_of_firm_of_key_player(i) = id_of_firm(key_players(i));
    end
    for i=1:num_ranked_firms
        rank = find(num_of_firm_of_key_player==key_firms(i));
        if(~isempty(rank))
            key_firms_rank(i,s) = rank;
        else
            key_firms_rank(i,s) = NaN;
        end
    end
    
    %% Write the total homogeneous subsidies to a file.
    disp(['Write the total homogeneous subsidies to a file...'])
    fid = fopen(['data/' num2str(time(s)) '_' pat_prox '_' opt_alg '_hom_subsidies.csv'],'w');
    fprintf(fid,'firm_id, subsidy \n');
    for i=1:length(q_sub)
        fprintf(fid,'%i, %f \n', id_of_firm(i), (q_sub(i)+subsidy(s))*subsidy(s));
    end
    fclose(fid);
    
    %% Write the ranking to a dat file.
    fid = fopen(['data/' num2str(time(s)) '_' pat_prox '_' opt_alg '_hom_subsidies_ranking.dat'],'w');
    for i=1:length(key_players)
        ind = id_of_firm(key_players(i));
        pat_ind = find(pat_firm_id==ind);
        if(~isempty(pat_ind))
            num_patents = num_pat(pat_ind);
        else
            num_patents = 0;
        end
        fprintf(fid,'%s \t & %0.4f \t & %i \t & %i \t & %0.4f \t & %0.4f \t & %0.4f \t & %0.4f \t & %0.4f \t & %0.4f \t & %i \\\\ \n', ...
            name{ind}, ...
            100*market_share(key_players(i)), ...
            num_patents, ...
            deg_centrality(key_players(i)), ...
            eigenvector_centrality(key_players(i)), ...
            betw_centrality(key_players(i)), ...
            closeness_centrality(key_players(i)), ...
            100*q_sub(key_players(i))/sum(q_sub), ...
            100*int_centrality(key_players(i))/sum(b_mu), ...
            (q_sub(key_players(i))+subsidy(s))/(mean(q_sub)+subsidy(s)), ...
            i);
    end
    fclose(fid);
    
    %% Write the ranking to a csv file.
    fid = fopen(['data/' num2str(time(s)) '_' pat_prox '_' opt_alg '_hom_subsidies_ranking.csv'],'w');
    fprintf(fid,'num_of_firm, name, market_share, num_pat, degree, coreness, eigvec, betweenness, closeness, output, inter_centrality, q_subsidy, rank \n');
    for i=1:length(key_players)
        ind = id_of_firm(key_players(i));
        pat_ind = find(pat_firm_id==ind);
        if(~isempty(pat_ind))
            num_patents = num_pat(pat_ind);
        else
            num_patents = 0;
        end
        fprintf(fid,'%i, %s, %0.4f, %i, %i, %i, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %0.4f, %i \n', ...
            ind, ...
            name{ind}, ...
            100*market_share(key_players(i)), ...
            num_patents, ...
            deg_centrality(key_players(i)), ...
            kcoreness_centrality(key_players(i)), ...
            eigenvector_centrality(key_players(i)), ...
            betw_centrality(key_players(i)), ...
            closeness_centrality(key_players(i)), ...
            100*q_sub(key_players(i))/sum(q_sub), ...
            100*int_centrality(key_players(i))/sum(b_mu), ...
            q_sub(key_players(i))/mean(q_sub), ...
            i);
    end
    fclose(fid);
    
end

disp(['Number of years for which Hessian is not positive definite: ' num2str(length(find(hessian_pos_definite==0)))])

if(length(time)>1)
    
    %% Plotting the subsidy levels over time.
    figure()
    set(gca,'Layer','top')
    set(gca,'FontSize',20);
    box on
    plot(time,100*aggr_subsidy./aggr_subsidy(1),'-ok','LineWidth',2,'MarkerSize',10,'MarkerFaceColor','auto');
    set(0,'defaulttextinterpreter','Latex')
    xlabel('year');
    ylabel('$s^* \|\mathbf{e}\|_1 [\%]$');
    axis tight;
    set(gca,'FontSize',20);
    print('-depsc',strcat(num2str(time(1)),'_',num2str(time(end)),'_',pat_prox,'_',opt_alg,'_hom_subsidy.eps'));
    
    %% Plotting the aggregate output levels over time.
    figure()
    set(gca,'Layer','top')
    set(gca,'FontSize',20);
    box on
    plot(time,aggr_output,'-ok','LineWidth',2,'MarkerSize',10,'MarkerFaceColor','auto');
    set(0,'defaulttextinterpreter','Latex')
    xlabel('year');
    ylabel('$\|\mathbf{q}^*\|_1$');
    axis tight;
    set(gca,'FontSize',20);
    print('-depsc',strcat(num2str(time(1)),'_',num2str(time(end)),'_',pat_prox,'_',opt_alg,'_hom_subsidy_aggr_output.eps'));
    
    %% Plot the percentage increase in welfare due to the subsidy over time.
    figure()
    set(gca,'Layer','top')
    set(gca,'FontSize',20);
    box on
    plot(time,100*delta_W,'-ok','LineWidth',2,'MarkerSize',10,'MarkerFaceColor','auto');
    set(0,'defaulttextinterpreter','Latex')
    xlabel('year');
    ylabel('$\frac{\bar{W}(G,s^*)-W(G)}{W(G)}$ $[\%]$');
    axis tight;
    ylim([0 max(110*delta_W)])
    set(gca,'FontSize',20);
    print('-depsc',strcat(num2str(time(1)),'_',num2str(time(end)),'_',pat_prox,'_',opt_alg,'_delta_welfare_hom_subsidy.eps'));
    
end

t=toc; % Stopping the timer.
disp(datestr(datenum(0,0,0,0,0,t),'HH:MM:SS'))

